decoder.py 38 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052
  1. import datetime
  2. import io
  3. from os import linesep
  4. import re
  5. import sys
  6. from pip._vendor.toml.tz import TomlTz
  7. if sys.version_info < (3,):
  8. _range = xrange # noqa: F821
  9. else:
  10. unicode = str
  11. _range = range
  12. basestring = str
  13. unichr = chr
  14. def _detect_pathlib_path(p):
  15. if (3, 4) <= sys.version_info:
  16. import pathlib
  17. if isinstance(p, pathlib.PurePath):
  18. return True
  19. return False
  20. def _ispath(p):
  21. if isinstance(p, (bytes, basestring)):
  22. return True
  23. return _detect_pathlib_path(p)
  24. def _getpath(p):
  25. if (3, 6) <= sys.version_info:
  26. import os
  27. return os.fspath(p)
  28. if _detect_pathlib_path(p):
  29. return str(p)
  30. return p
  31. try:
  32. FNFError = FileNotFoundError
  33. except NameError:
  34. FNFError = IOError
  35. TIME_RE = re.compile(r"([0-9]{2}):([0-9]{2}):([0-9]{2})(\.([0-9]{3,6}))?")
  36. class TomlDecodeError(ValueError):
  37. """Base toml Exception / Error."""
  38. def __init__(self, msg, doc, pos):
  39. lineno = doc.count('\n', 0, pos) + 1
  40. colno = pos - doc.rfind('\n', 0, pos)
  41. emsg = '{} (line {} column {} char {})'.format(msg, lineno, colno, pos)
  42. ValueError.__init__(self, emsg)
  43. self.msg = msg
  44. self.doc = doc
  45. self.pos = pos
  46. self.lineno = lineno
  47. self.colno = colno
  48. # Matches a TOML number, which allows underscores for readability
  49. _number_with_underscores = re.compile('([0-9])(_([0-9]))*')
  50. class CommentValue(object):
  51. def __init__(self, val, comment, beginline, _dict):
  52. self.val = val
  53. separator = "\n" if beginline else " "
  54. self.comment = separator + comment
  55. self._dict = _dict
  56. def __getitem__(self, key):
  57. return self.val[key]
  58. def __setitem__(self, key, value):
  59. self.val[key] = value
  60. def dump(self, dump_value_func):
  61. retstr = dump_value_func(self.val)
  62. if isinstance(self.val, self._dict):
  63. return self.comment + "\n" + unicode(retstr)
  64. else:
  65. return unicode(retstr) + self.comment
  66. def _strictly_valid_num(n):
  67. n = n.strip()
  68. if not n:
  69. return False
  70. if n[0] == '_':
  71. return False
  72. if n[-1] == '_':
  73. return False
  74. if "_." in n or "._" in n:
  75. return False
  76. if len(n) == 1:
  77. return True
  78. if n[0] == '0' and n[1] not in ['.', 'o', 'b', 'x']:
  79. return False
  80. if n[0] == '+' or n[0] == '-':
  81. n = n[1:]
  82. if len(n) > 1 and n[0] == '0' and n[1] != '.':
  83. return False
  84. if '__' in n:
  85. return False
  86. return True
  87. def load(f, _dict=dict, decoder=None):
  88. """Parses named file or files as toml and returns a dictionary
  89. Args:
  90. f: Path to the file to open, array of files to read into single dict
  91. or a file descriptor
  92. _dict: (optional) Specifies the class of the returned toml dictionary
  93. decoder: The decoder to use
  94. Returns:
  95. Parsed toml file represented as a dictionary
  96. Raises:
  97. TypeError -- When f is invalid type
  98. TomlDecodeError: Error while decoding toml
  99. IOError / FileNotFoundError -- When an array with no valid (existing)
  100. (Python 2 / Python 3) file paths is passed
  101. """
  102. if _ispath(f):
  103. with io.open(_getpath(f), encoding='utf-8') as ffile:
  104. return loads(ffile.read(), _dict, decoder)
  105. elif isinstance(f, list):
  106. from os import path as op
  107. from warnings import warn
  108. if not [path for path in f if op.exists(path)]:
  109. error_msg = "Load expects a list to contain filenames only."
  110. error_msg += linesep
  111. error_msg += ("The list needs to contain the path of at least one "
  112. "existing file.")
  113. raise FNFError(error_msg)
  114. if decoder is None:
  115. decoder = TomlDecoder(_dict)
  116. d = decoder.get_empty_table()
  117. for l in f: # noqa: E741
  118. if op.exists(l):
  119. d.update(load(l, _dict, decoder))
  120. else:
  121. warn("Non-existent filename in list with at least one valid "
  122. "filename")
  123. return d
  124. else:
  125. try:
  126. return loads(f.read(), _dict, decoder)
  127. except AttributeError:
  128. raise TypeError("You can only load a file descriptor, filename or "
  129. "list")
  130. _groupname_re = re.compile(r'^[A-Za-z0-9_-]+$')
  131. def loads(s, _dict=dict, decoder=None):
  132. """Parses string as toml
  133. Args:
  134. s: String to be parsed
  135. _dict: (optional) Specifies the class of the returned toml dictionary
  136. Returns:
  137. Parsed toml file represented as a dictionary
  138. Raises:
  139. TypeError: When a non-string is passed
  140. TomlDecodeError: Error while decoding toml
  141. """
  142. implicitgroups = []
  143. if decoder is None:
  144. decoder = TomlDecoder(_dict)
  145. retval = decoder.get_empty_table()
  146. currentlevel = retval
  147. if not isinstance(s, basestring):
  148. raise TypeError("Expecting something like a string")
  149. if not isinstance(s, unicode):
  150. s = s.decode('utf8')
  151. original = s
  152. sl = list(s)
  153. openarr = 0
  154. openstring = False
  155. openstrchar = ""
  156. multilinestr = False
  157. arrayoftables = False
  158. beginline = True
  159. keygroup = False
  160. dottedkey = False
  161. keyname = 0
  162. key = ''
  163. prev_key = ''
  164. line_no = 1
  165. for i, item in enumerate(sl):
  166. if item == '\r' and sl[i + 1] == '\n':
  167. sl[i] = ' '
  168. continue
  169. if keyname:
  170. key += item
  171. if item == '\n':
  172. raise TomlDecodeError("Key name found without value."
  173. " Reached end of line.", original, i)
  174. if openstring:
  175. if item == openstrchar:
  176. oddbackslash = False
  177. k = 1
  178. while i >= k and sl[i - k] == '\\':
  179. oddbackslash = not oddbackslash
  180. k += 1
  181. if not oddbackslash:
  182. keyname = 2
  183. openstring = False
  184. openstrchar = ""
  185. continue
  186. elif keyname == 1:
  187. if item.isspace():
  188. keyname = 2
  189. continue
  190. elif item == '.':
  191. dottedkey = True
  192. continue
  193. elif item.isalnum() or item == '_' or item == '-':
  194. continue
  195. elif (dottedkey and sl[i - 1] == '.' and
  196. (item == '"' or item == "'")):
  197. openstring = True
  198. openstrchar = item
  199. continue
  200. elif keyname == 2:
  201. if item.isspace():
  202. if dottedkey:
  203. nextitem = sl[i + 1]
  204. if not nextitem.isspace() and nextitem != '.':
  205. keyname = 1
  206. continue
  207. if item == '.':
  208. dottedkey = True
  209. nextitem = sl[i + 1]
  210. if not nextitem.isspace() and nextitem != '.':
  211. keyname = 1
  212. continue
  213. if item == '=':
  214. keyname = 0
  215. prev_key = key[:-1].rstrip()
  216. key = ''
  217. dottedkey = False
  218. else:
  219. raise TomlDecodeError("Found invalid character in key name: '" +
  220. item + "'. Try quoting the key name.",
  221. original, i)
  222. if item == "'" and openstrchar != '"':
  223. k = 1
  224. try:
  225. while sl[i - k] == "'":
  226. k += 1
  227. if k == 3:
  228. break
  229. except IndexError:
  230. pass
  231. if k == 3:
  232. multilinestr = not multilinestr
  233. openstring = multilinestr
  234. else:
  235. openstring = not openstring
  236. if openstring:
  237. openstrchar = "'"
  238. else:
  239. openstrchar = ""
  240. if item == '"' and openstrchar != "'":
  241. oddbackslash = False
  242. k = 1
  243. tripquote = False
  244. try:
  245. while sl[i - k] == '"':
  246. k += 1
  247. if k == 3:
  248. tripquote = True
  249. break
  250. if k == 1 or (k == 3 and tripquote):
  251. while sl[i - k] == '\\':
  252. oddbackslash = not oddbackslash
  253. k += 1
  254. except IndexError:
  255. pass
  256. if not oddbackslash:
  257. if tripquote:
  258. multilinestr = not multilinestr
  259. openstring = multilinestr
  260. else:
  261. openstring = not openstring
  262. if openstring:
  263. openstrchar = '"'
  264. else:
  265. openstrchar = ""
  266. if item == '#' and (not openstring and not keygroup and
  267. not arrayoftables):
  268. j = i
  269. comment = ""
  270. try:
  271. while sl[j] != '\n':
  272. comment += s[j]
  273. sl[j] = ' '
  274. j += 1
  275. except IndexError:
  276. break
  277. if not openarr:
  278. decoder.preserve_comment(line_no, prev_key, comment, beginline)
  279. if item == '[' and (not openstring and not keygroup and
  280. not arrayoftables):
  281. if beginline:
  282. if len(sl) > i + 1 and sl[i + 1] == '[':
  283. arrayoftables = True
  284. else:
  285. keygroup = True
  286. else:
  287. openarr += 1
  288. if item == ']' and not openstring:
  289. if keygroup:
  290. keygroup = False
  291. elif arrayoftables:
  292. if sl[i - 1] == ']':
  293. arrayoftables = False
  294. else:
  295. openarr -= 1
  296. if item == '\n':
  297. if openstring or multilinestr:
  298. if not multilinestr:
  299. raise TomlDecodeError("Unbalanced quotes", original, i)
  300. if ((sl[i - 1] == "'" or sl[i - 1] == '"') and (
  301. sl[i - 2] == sl[i - 1])):
  302. sl[i] = sl[i - 1]
  303. if sl[i - 3] == sl[i - 1]:
  304. sl[i - 3] = ' '
  305. elif openarr:
  306. sl[i] = ' '
  307. else:
  308. beginline = True
  309. line_no += 1
  310. elif beginline and sl[i] != ' ' and sl[i] != '\t':
  311. beginline = False
  312. if not keygroup and not arrayoftables:
  313. if sl[i] == '=':
  314. raise TomlDecodeError("Found empty keyname. ", original, i)
  315. keyname = 1
  316. key += item
  317. if keyname:
  318. raise TomlDecodeError("Key name found without value."
  319. " Reached end of file.", original, len(s))
  320. if openstring: # reached EOF and have an unterminated string
  321. raise TomlDecodeError("Unterminated string found."
  322. " Reached end of file.", original, len(s))
  323. s = ''.join(sl)
  324. s = s.split('\n')
  325. multikey = None
  326. multilinestr = ""
  327. multibackslash = False
  328. pos = 0
  329. for idx, line in enumerate(s):
  330. if idx > 0:
  331. pos += len(s[idx - 1]) + 1
  332. decoder.embed_comments(idx, currentlevel)
  333. if not multilinestr or multibackslash or '\n' not in multilinestr:
  334. line = line.strip()
  335. if line == "" and (not multikey or multibackslash):
  336. continue
  337. if multikey:
  338. if multibackslash:
  339. multilinestr += line
  340. else:
  341. multilinestr += line
  342. multibackslash = False
  343. closed = False
  344. if multilinestr[0] == '[':
  345. closed = line[-1] == ']'
  346. elif len(line) > 2:
  347. closed = (line[-1] == multilinestr[0] and
  348. line[-2] == multilinestr[0] and
  349. line[-3] == multilinestr[0])
  350. if closed:
  351. try:
  352. value, vtype = decoder.load_value(multilinestr)
  353. except ValueError as err:
  354. raise TomlDecodeError(str(err), original, pos)
  355. currentlevel[multikey] = value
  356. multikey = None
  357. multilinestr = ""
  358. else:
  359. k = len(multilinestr) - 1
  360. while k > -1 and multilinestr[k] == '\\':
  361. multibackslash = not multibackslash
  362. k -= 1
  363. if multibackslash:
  364. multilinestr = multilinestr[:-1]
  365. else:
  366. multilinestr += "\n"
  367. continue
  368. if line[0] == '[':
  369. arrayoftables = False
  370. if len(line) == 1:
  371. raise TomlDecodeError("Opening key group bracket on line by "
  372. "itself.", original, pos)
  373. if line[1] == '[':
  374. arrayoftables = True
  375. line = line[2:]
  376. splitstr = ']]'
  377. else:
  378. line = line[1:]
  379. splitstr = ']'
  380. i = 1
  381. quotesplits = decoder._get_split_on_quotes(line)
  382. quoted = False
  383. for quotesplit in quotesplits:
  384. if not quoted and splitstr in quotesplit:
  385. break
  386. i += quotesplit.count(splitstr)
  387. quoted = not quoted
  388. line = line.split(splitstr, i)
  389. if len(line) < i + 1 or line[-1].strip() != "":
  390. raise TomlDecodeError("Key group not on a line by itself.",
  391. original, pos)
  392. groups = splitstr.join(line[:-1]).split('.')
  393. i = 0
  394. while i < len(groups):
  395. groups[i] = groups[i].strip()
  396. if len(groups[i]) > 0 and (groups[i][0] == '"' or
  397. groups[i][0] == "'"):
  398. groupstr = groups[i]
  399. j = i + 1
  400. while not groupstr[0] == groupstr[-1]:
  401. j += 1
  402. if j > len(groups) + 2:
  403. raise TomlDecodeError("Invalid group name '" +
  404. groupstr + "' Something " +
  405. "went wrong.", original, pos)
  406. groupstr = '.'.join(groups[i:j]).strip()
  407. groups[i] = groupstr[1:-1]
  408. groups[i + 1:j] = []
  409. else:
  410. if not _groupname_re.match(groups[i]):
  411. raise TomlDecodeError("Invalid group name '" +
  412. groups[i] + "'. Try quoting it.",
  413. original, pos)
  414. i += 1
  415. currentlevel = retval
  416. for i in _range(len(groups)):
  417. group = groups[i]
  418. if group == "":
  419. raise TomlDecodeError("Can't have a keygroup with an empty "
  420. "name", original, pos)
  421. try:
  422. currentlevel[group]
  423. if i == len(groups) - 1:
  424. if group in implicitgroups:
  425. implicitgroups.remove(group)
  426. if arrayoftables:
  427. raise TomlDecodeError("An implicitly defined "
  428. "table can't be an array",
  429. original, pos)
  430. elif arrayoftables:
  431. currentlevel[group].append(decoder.get_empty_table()
  432. )
  433. else:
  434. raise TomlDecodeError("What? " + group +
  435. " already exists?" +
  436. str(currentlevel),
  437. original, pos)
  438. except TypeError:
  439. currentlevel = currentlevel[-1]
  440. if group not in currentlevel:
  441. currentlevel[group] = decoder.get_empty_table()
  442. if i == len(groups) - 1 and arrayoftables:
  443. currentlevel[group] = [decoder.get_empty_table()]
  444. except KeyError:
  445. if i != len(groups) - 1:
  446. implicitgroups.append(group)
  447. currentlevel[group] = decoder.get_empty_table()
  448. if i == len(groups) - 1 and arrayoftables:
  449. currentlevel[group] = [decoder.get_empty_table()]
  450. currentlevel = currentlevel[group]
  451. if arrayoftables:
  452. try:
  453. currentlevel = currentlevel[-1]
  454. except KeyError:
  455. pass
  456. elif line[0] == "{":
  457. if line[-1] != "}":
  458. raise TomlDecodeError("Line breaks are not allowed in inline"
  459. "objects", original, pos)
  460. try:
  461. decoder.load_inline_object(line, currentlevel, multikey,
  462. multibackslash)
  463. except ValueError as err:
  464. raise TomlDecodeError(str(err), original, pos)
  465. elif "=" in line:
  466. try:
  467. ret = decoder.load_line(line, currentlevel, multikey,
  468. multibackslash)
  469. except ValueError as err:
  470. raise TomlDecodeError(str(err), original, pos)
  471. if ret is not None:
  472. multikey, multilinestr, multibackslash = ret
  473. return retval
  474. def _load_date(val):
  475. microsecond = 0
  476. tz = None
  477. try:
  478. if len(val) > 19:
  479. if val[19] == '.':
  480. if val[-1].upper() == 'Z':
  481. subsecondval = val[20:-1]
  482. tzval = "Z"
  483. else:
  484. subsecondvalandtz = val[20:]
  485. if '+' in subsecondvalandtz:
  486. splitpoint = subsecondvalandtz.index('+')
  487. subsecondval = subsecondvalandtz[:splitpoint]
  488. tzval = subsecondvalandtz[splitpoint:]
  489. elif '-' in subsecondvalandtz:
  490. splitpoint = subsecondvalandtz.index('-')
  491. subsecondval = subsecondvalandtz[:splitpoint]
  492. tzval = subsecondvalandtz[splitpoint:]
  493. else:
  494. tzval = None
  495. subsecondval = subsecondvalandtz
  496. if tzval is not None:
  497. tz = TomlTz(tzval)
  498. microsecond = int(int(subsecondval) *
  499. (10 ** (6 - len(subsecondval))))
  500. else:
  501. tz = TomlTz(val[19:])
  502. except ValueError:
  503. tz = None
  504. if "-" not in val[1:]:
  505. return None
  506. try:
  507. if len(val) == 10:
  508. d = datetime.date(
  509. int(val[:4]), int(val[5:7]),
  510. int(val[8:10]))
  511. else:
  512. d = datetime.datetime(
  513. int(val[:4]), int(val[5:7]),
  514. int(val[8:10]), int(val[11:13]),
  515. int(val[14:16]), int(val[17:19]), microsecond, tz)
  516. except ValueError:
  517. return None
  518. return d
  519. def _load_unicode_escapes(v, hexbytes, prefix):
  520. skip = False
  521. i = len(v) - 1
  522. while i > -1 and v[i] == '\\':
  523. skip = not skip
  524. i -= 1
  525. for hx in hexbytes:
  526. if skip:
  527. skip = False
  528. i = len(hx) - 1
  529. while i > -1 and hx[i] == '\\':
  530. skip = not skip
  531. i -= 1
  532. v += prefix
  533. v += hx
  534. continue
  535. hxb = ""
  536. i = 0
  537. hxblen = 4
  538. if prefix == "\\U":
  539. hxblen = 8
  540. hxb = ''.join(hx[i:i + hxblen]).lower()
  541. if hxb.strip('0123456789abcdef'):
  542. raise ValueError("Invalid escape sequence: " + hxb)
  543. if hxb[0] == "d" and hxb[1].strip('01234567'):
  544. raise ValueError("Invalid escape sequence: " + hxb +
  545. ". Only scalar unicode points are allowed.")
  546. v += unichr(int(hxb, 16))
  547. v += unicode(hx[len(hxb):])
  548. return v
  549. # Unescape TOML string values.
  550. # content after the \
  551. _escapes = ['0', 'b', 'f', 'n', 'r', 't', '"']
  552. # What it should be replaced by
  553. _escapedchars = ['\0', '\b', '\f', '\n', '\r', '\t', '\"']
  554. # Used for substitution
  555. _escape_to_escapedchars = dict(zip(_escapes, _escapedchars))
  556. def _unescape(v):
  557. """Unescape characters in a TOML string."""
  558. i = 0
  559. backslash = False
  560. while i < len(v):
  561. if backslash:
  562. backslash = False
  563. if v[i] in _escapes:
  564. v = v[:i - 1] + _escape_to_escapedchars[v[i]] + v[i + 1:]
  565. elif v[i] == '\\':
  566. v = v[:i - 1] + v[i:]
  567. elif v[i] == 'u' or v[i] == 'U':
  568. i += 1
  569. else:
  570. raise ValueError("Reserved escape sequence used")
  571. continue
  572. elif v[i] == '\\':
  573. backslash = True
  574. i += 1
  575. return v
  576. class InlineTableDict(object):
  577. """Sentinel subclass of dict for inline tables."""
  578. class TomlDecoder(object):
  579. def __init__(self, _dict=dict):
  580. self._dict = _dict
  581. def get_empty_table(self):
  582. return self._dict()
  583. def get_empty_inline_table(self):
  584. class DynamicInlineTableDict(self._dict, InlineTableDict):
  585. """Concrete sentinel subclass for inline tables.
  586. It is a subclass of _dict which is passed in dynamically at load
  587. time
  588. It is also a subclass of InlineTableDict
  589. """
  590. return DynamicInlineTableDict()
  591. def load_inline_object(self, line, currentlevel, multikey=False,
  592. multibackslash=False):
  593. candidate_groups = line[1:-1].split(",")
  594. groups = []
  595. if len(candidate_groups) == 1 and not candidate_groups[0].strip():
  596. candidate_groups.pop()
  597. while len(candidate_groups) > 0:
  598. candidate_group = candidate_groups.pop(0)
  599. try:
  600. _, value = candidate_group.split('=', 1)
  601. except ValueError:
  602. raise ValueError("Invalid inline table encountered")
  603. value = value.strip()
  604. if ((value[0] == value[-1] and value[0] in ('"', "'")) or (
  605. value[0] in '-0123456789' or
  606. value in ('true', 'false') or
  607. (value[0] == "[" and value[-1] == "]") or
  608. (value[0] == '{' and value[-1] == '}'))):
  609. groups.append(candidate_group)
  610. elif len(candidate_groups) > 0:
  611. candidate_groups[0] = (candidate_group + "," +
  612. candidate_groups[0])
  613. else:
  614. raise ValueError("Invalid inline table value encountered")
  615. for group in groups:
  616. status = self.load_line(group, currentlevel, multikey,
  617. multibackslash)
  618. if status is not None:
  619. break
  620. def _get_split_on_quotes(self, line):
  621. doublequotesplits = line.split('"')
  622. quoted = False
  623. quotesplits = []
  624. if len(doublequotesplits) > 1 and "'" in doublequotesplits[0]:
  625. singlequotesplits = doublequotesplits[0].split("'")
  626. doublequotesplits = doublequotesplits[1:]
  627. while len(singlequotesplits) % 2 == 0 and len(doublequotesplits):
  628. singlequotesplits[-1] += '"' + doublequotesplits[0]
  629. doublequotesplits = doublequotesplits[1:]
  630. if "'" in singlequotesplits[-1]:
  631. singlequotesplits = (singlequotesplits[:-1] +
  632. singlequotesplits[-1].split("'"))
  633. quotesplits += singlequotesplits
  634. for doublequotesplit in doublequotesplits:
  635. if quoted:
  636. quotesplits.append(doublequotesplit)
  637. else:
  638. quotesplits += doublequotesplit.split("'")
  639. quoted = not quoted
  640. return quotesplits
  641. def load_line(self, line, currentlevel, multikey, multibackslash):
  642. i = 1
  643. quotesplits = self._get_split_on_quotes(line)
  644. quoted = False
  645. for quotesplit in quotesplits:
  646. if not quoted and '=' in quotesplit:
  647. break
  648. i += quotesplit.count('=')
  649. quoted = not quoted
  650. pair = line.split('=', i)
  651. strictly_valid = _strictly_valid_num(pair[-1])
  652. if _number_with_underscores.match(pair[-1]):
  653. pair[-1] = pair[-1].replace('_', '')
  654. while len(pair[-1]) and (pair[-1][0] != ' ' and pair[-1][0] != '\t' and
  655. pair[-1][0] != "'" and pair[-1][0] != '"' and
  656. pair[-1][0] != '[' and pair[-1][0] != '{' and
  657. pair[-1].strip() != 'true' and
  658. pair[-1].strip() != 'false'):
  659. try:
  660. float(pair[-1])
  661. break
  662. except ValueError:
  663. pass
  664. if _load_date(pair[-1]) is not None:
  665. break
  666. if TIME_RE.match(pair[-1]):
  667. break
  668. i += 1
  669. prev_val = pair[-1]
  670. pair = line.split('=', i)
  671. if prev_val == pair[-1]:
  672. raise ValueError("Invalid date or number")
  673. if strictly_valid:
  674. strictly_valid = _strictly_valid_num(pair[-1])
  675. pair = ['='.join(pair[:-1]).strip(), pair[-1].strip()]
  676. if '.' in pair[0]:
  677. if '"' in pair[0] or "'" in pair[0]:
  678. quotesplits = self._get_split_on_quotes(pair[0])
  679. quoted = False
  680. levels = []
  681. for quotesplit in quotesplits:
  682. if quoted:
  683. levels.append(quotesplit)
  684. else:
  685. levels += [level.strip() for level in
  686. quotesplit.split('.')]
  687. quoted = not quoted
  688. else:
  689. levels = pair[0].split('.')
  690. while levels[-1] == "":
  691. levels = levels[:-1]
  692. for level in levels[:-1]:
  693. if level == "":
  694. continue
  695. if level not in currentlevel:
  696. currentlevel[level] = self.get_empty_table()
  697. currentlevel = currentlevel[level]
  698. pair[0] = levels[-1].strip()
  699. elif (pair[0][0] == '"' or pair[0][0] == "'") and \
  700. (pair[0][-1] == pair[0][0]):
  701. pair[0] = _unescape(pair[0][1:-1])
  702. k, koffset = self._load_line_multiline_str(pair[1])
  703. if k > -1:
  704. while k > -1 and pair[1][k + koffset] == '\\':
  705. multibackslash = not multibackslash
  706. k -= 1
  707. if multibackslash:
  708. multilinestr = pair[1][:-1]
  709. else:
  710. multilinestr = pair[1] + "\n"
  711. multikey = pair[0]
  712. else:
  713. value, vtype = self.load_value(pair[1], strictly_valid)
  714. try:
  715. currentlevel[pair[0]]
  716. raise ValueError("Duplicate keys!")
  717. except TypeError:
  718. raise ValueError("Duplicate keys!")
  719. except KeyError:
  720. if multikey:
  721. return multikey, multilinestr, multibackslash
  722. else:
  723. currentlevel[pair[0]] = value
  724. def _load_line_multiline_str(self, p):
  725. poffset = 0
  726. if len(p) < 3:
  727. return -1, poffset
  728. if p[0] == '[' and (p.strip()[-1] != ']' and
  729. self._load_array_isstrarray(p)):
  730. newp = p[1:].strip().split(',')
  731. while len(newp) > 1 and newp[-1][0] != '"' and newp[-1][0] != "'":
  732. newp = newp[:-2] + [newp[-2] + ',' + newp[-1]]
  733. newp = newp[-1]
  734. poffset = len(p) - len(newp)
  735. p = newp
  736. if p[0] != '"' and p[0] != "'":
  737. return -1, poffset
  738. if p[1] != p[0] or p[2] != p[0]:
  739. return -1, poffset
  740. if len(p) > 5 and p[-1] == p[0] and p[-2] == p[0] and p[-3] == p[0]:
  741. return -1, poffset
  742. return len(p) - 1, poffset
  743. def load_value(self, v, strictly_valid=True):
  744. if not v:
  745. raise ValueError("Empty value is invalid")
  746. if v == 'true':
  747. return (True, "bool")
  748. elif v == 'false':
  749. return (False, "bool")
  750. elif v[0] == '"' or v[0] == "'":
  751. quotechar = v[0]
  752. testv = v[1:].split(quotechar)
  753. triplequote = False
  754. triplequotecount = 0
  755. if len(testv) > 1 and testv[0] == '' and testv[1] == '':
  756. testv = testv[2:]
  757. triplequote = True
  758. closed = False
  759. for tv in testv:
  760. if tv == '':
  761. if triplequote:
  762. triplequotecount += 1
  763. else:
  764. closed = True
  765. else:
  766. oddbackslash = False
  767. try:
  768. i = -1
  769. j = tv[i]
  770. while j == '\\':
  771. oddbackslash = not oddbackslash
  772. i -= 1
  773. j = tv[i]
  774. except IndexError:
  775. pass
  776. if not oddbackslash:
  777. if closed:
  778. raise ValueError("Found tokens after a closed " +
  779. "string. Invalid TOML.")
  780. else:
  781. if not triplequote or triplequotecount > 1:
  782. closed = True
  783. else:
  784. triplequotecount = 0
  785. if quotechar == '"':
  786. escapeseqs = v.split('\\')[1:]
  787. backslash = False
  788. for i in escapeseqs:
  789. if i == '':
  790. backslash = not backslash
  791. else:
  792. if i[0] not in _escapes and (i[0] != 'u' and
  793. i[0] != 'U' and
  794. not backslash):
  795. raise ValueError("Reserved escape sequence used")
  796. if backslash:
  797. backslash = False
  798. for prefix in ["\\u", "\\U"]:
  799. if prefix in v:
  800. hexbytes = v.split(prefix)
  801. v = _load_unicode_escapes(hexbytes[0], hexbytes[1:],
  802. prefix)
  803. v = _unescape(v)
  804. if len(v) > 1 and v[1] == quotechar and (len(v) < 3 or
  805. v[1] == v[2]):
  806. v = v[2:-2]
  807. return (v[1:-1], "str")
  808. elif v[0] == '[':
  809. return (self.load_array(v), "array")
  810. elif v[0] == '{':
  811. inline_object = self.get_empty_inline_table()
  812. self.load_inline_object(v, inline_object)
  813. return (inline_object, "inline_object")
  814. elif TIME_RE.match(v):
  815. h, m, s, _, ms = TIME_RE.match(v).groups()
  816. time = datetime.time(int(h), int(m), int(s), int(ms) if ms else 0)
  817. return (time, "time")
  818. else:
  819. parsed_date = _load_date(v)
  820. if parsed_date is not None:
  821. return (parsed_date, "date")
  822. if not strictly_valid:
  823. raise ValueError("Weirdness with leading zeroes or "
  824. "underscores in your number.")
  825. itype = "int"
  826. neg = False
  827. if v[0] == '-':
  828. neg = True
  829. v = v[1:]
  830. elif v[0] == '+':
  831. v = v[1:]
  832. v = v.replace('_', '')
  833. lowerv = v.lower()
  834. if '.' in v or ('x' not in v and ('e' in v or 'E' in v)):
  835. if '.' in v and v.split('.', 1)[1] == '':
  836. raise ValueError("This float is missing digits after "
  837. "the point")
  838. if v[0] not in '0123456789':
  839. raise ValueError("This float doesn't have a leading "
  840. "digit")
  841. v = float(v)
  842. itype = "float"
  843. elif len(lowerv) == 3 and (lowerv == 'inf' or lowerv == 'nan'):
  844. v = float(v)
  845. itype = "float"
  846. if itype == "int":
  847. v = int(v, 0)
  848. if neg:
  849. return (0 - v, itype)
  850. return (v, itype)
  851. def bounded_string(self, s):
  852. if len(s) == 0:
  853. return True
  854. if s[-1] != s[0]:
  855. return False
  856. i = -2
  857. backslash = False
  858. while len(s) + i > 0:
  859. if s[i] == "\\":
  860. backslash = not backslash
  861. i -= 1
  862. else:
  863. break
  864. return not backslash
  865. def _load_array_isstrarray(self, a):
  866. a = a[1:-1].strip()
  867. if a != '' and (a[0] == '"' or a[0] == "'"):
  868. return True
  869. return False
  870. def load_array(self, a):
  871. atype = None
  872. retval = []
  873. a = a.strip()
  874. if '[' not in a[1:-1] or "" != a[1:-1].split('[')[0].strip():
  875. strarray = self._load_array_isstrarray(a)
  876. if not a[1:-1].strip().startswith('{'):
  877. a = a[1:-1].split(',')
  878. else:
  879. # a is an inline object, we must find the matching parenthesis
  880. # to define groups
  881. new_a = []
  882. start_group_index = 1
  883. end_group_index = 2
  884. open_bracket_count = 1 if a[start_group_index] == '{' else 0
  885. in_str = False
  886. while end_group_index < len(a[1:]):
  887. if a[end_group_index] == '"' or a[end_group_index] == "'":
  888. if in_str:
  889. backslash_index = end_group_index - 1
  890. while (backslash_index > -1 and
  891. a[backslash_index] == '\\'):
  892. in_str = not in_str
  893. backslash_index -= 1
  894. in_str = not in_str
  895. if not in_str and a[end_group_index] == '{':
  896. open_bracket_count += 1
  897. if in_str or a[end_group_index] != '}':
  898. end_group_index += 1
  899. continue
  900. elif a[end_group_index] == '}' and open_bracket_count > 1:
  901. open_bracket_count -= 1
  902. end_group_index += 1
  903. continue
  904. # Increase end_group_index by 1 to get the closing bracket
  905. end_group_index += 1
  906. new_a.append(a[start_group_index:end_group_index])
  907. # The next start index is at least after the closing
  908. # bracket, a closing bracket can be followed by a comma
  909. # since we are in an array.
  910. start_group_index = end_group_index + 1
  911. while (start_group_index < len(a[1:]) and
  912. a[start_group_index] != '{'):
  913. start_group_index += 1
  914. end_group_index = start_group_index + 1
  915. a = new_a
  916. b = 0
  917. if strarray:
  918. while b < len(a) - 1:
  919. ab = a[b].strip()
  920. while (not self.bounded_string(ab) or
  921. (len(ab) > 2 and
  922. ab[0] == ab[1] == ab[2] and
  923. ab[-2] != ab[0] and
  924. ab[-3] != ab[0])):
  925. a[b] = a[b] + ',' + a[b + 1]
  926. ab = a[b].strip()
  927. if b < len(a) - 2:
  928. a = a[:b + 1] + a[b + 2:]
  929. else:
  930. a = a[:b + 1]
  931. b += 1
  932. else:
  933. al = list(a[1:-1])
  934. a = []
  935. openarr = 0
  936. j = 0
  937. for i in _range(len(al)):
  938. if al[i] == '[':
  939. openarr += 1
  940. elif al[i] == ']':
  941. openarr -= 1
  942. elif al[i] == ',' and not openarr:
  943. a.append(''.join(al[j:i]))
  944. j = i + 1
  945. a.append(''.join(al[j:]))
  946. for i in _range(len(a)):
  947. a[i] = a[i].strip()
  948. if a[i] != '':
  949. nval, ntype = self.load_value(a[i])
  950. if atype:
  951. if ntype != atype:
  952. raise ValueError("Not a homogeneous array")
  953. else:
  954. atype = ntype
  955. retval.append(nval)
  956. return retval
  957. def preserve_comment(self, line_no, key, comment, beginline):
  958. pass
  959. def embed_comments(self, idx, currentlevel):
  960. pass
  961. class TomlPreserveCommentDecoder(TomlDecoder):
  962. def __init__(self, _dict=dict):
  963. self.saved_comments = {}
  964. super(TomlPreserveCommentDecoder, self).__init__(_dict)
  965. def preserve_comment(self, line_no, key, comment, beginline):
  966. self.saved_comments[line_no] = (key, comment, beginline)
  967. def embed_comments(self, idx, currentlevel):
  968. if idx not in self.saved_comments:
  969. return
  970. key, comment, beginline = self.saved_comments[idx]
  971. currentlevel[key] = CommentValue(currentlevel[key], comment, beginline,
  972. self._dict)